\documentclass[t,12pt,aspectratio=169]{beamer} % 16:9 宽屏比例，适合现代投影
\usepackage{ctex} % 中文支持
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{url}
\usepackage{verbatim}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 使表格美观
\usepackage{array}
\newcolumntype{M}[1]{>{\centering\arraybackslash}m{#1}}
\setlength\extrarowheight{3pt}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 插入代码
\usepackage{listings}
\usepackage{color}

% 设置列表的样式
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

\lstdefinestyle{mystyle}{
    backgroundcolor=\color{backcolour},   
    commentstyle=\color{codegreen},
    keywordstyle=\color{magenta},
    numberstyle=\tiny\color{codegray},
    stringstyle=\color{codepurple},
    basicstyle=\ttfamily\footnotesize,
    breakatwhitespace=false,         
    breaklines=true,                 
    captionpos=b,                    
    keepspaces=true,                 
    numbers=left,                    
    numbersep=5pt,                  
    showspaces=false,                
    showstringspaces=false,
    showtabs=false,                  
    tabsize=2
}

\lstset{style=mystyle}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 主题设置（推荐简洁风格）
\usetheme{Madrid}
\usecolortheme{default} % 可选：seahorse, beaver, dolphin 等

\title{R语言统计入门第5章：单样本和双样本检验 }
\author{PD ET AL}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{目录 One- and two-sample tests }

\begin{enumerate}
\item[5.1.] 单样本 t 检验 One-sample t test 
\item[5.2.] Wilcoxon 符号秩和检验 Wilcoxon signed-rank test 
\item[5.3.] 两样本 t 检验 Two-sample t test 
\item[5.4.] 比较方差 Comparison of variances 
\item[5.5.] 两样本 Wilcoxon 检验 Two-sample Wilcoxon test 
\item[5.6.] 配对 t 检验 The paired t test
\item[5.7.] 配对 Wilcoxon 检验 The matched-pairs Wilcoxon test
\item[5.8.] 书中习题 Exercises
\item[5.9.] 单项选择题
\item[5.10.] 简答题 
\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{课程讲解重点难点 }

\begin{enumerate}

\item  均值的标准误，单侧检验与双侧检验，显著性水平，t统计量。

\item  t.test函数的输入参数与输出结果，mu参数，alternative参数。

\item  非参数检验的思路，Wilcoxon秩和检验的步骤。

\item  wilcox.test函数的输入参数与输出，V统计量。

\item  均值差的标准误，经典计算方法和Welch方法。

\item  方差相等的检验，var.test函数。

%\item  配对检验与两样本检验的区别。

%\item  单样本的t检验和符号秩和检验
%\item  两样本的t检验和符号秩和检验
%\item  配对样本的t检验和符号秩和检验
%\item 三种情形的样本：单样本，双样本，配对样本
%\item 两种检验方法：t检验，Wilcoxon符号秩和检验

\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.1.a. 单样本 t 检验的原理 One-sample t test}

\begin{itemize}
\item  {\color{red}问题：解释单样本 t 检验的原理与过程。}
\item  解答：

\begin{enumerate}
\item 设样本 $(X_1,X_2,\cdots,X_n)$ 来自正态分布的总体 $X\sim N(\mu,\sigma^2)$, 其中方差 $\sigma^2$ 未知。
我们要对总体均值进行假设检验：$$ H_0: \mu=\mu_0,\,\, \textrm{v.s.} \,\, H_1:\mu\neq \mu_0. $$

\item  定义样本均值、样本方差与 t 统计量。在零假设成立时，$T\sim t(n-1)$.
\[ \overline{X}=\frac{1}{n}\sum\limits_{i=1}^{n} X_i, \,\, 
S^2 = \frac{1}{n-1}\sum\limits_{i=1}^{n} (X_i-\overline{X})^2,\,\,
T= \frac{\bar{X}-\mu_0}{S/\sqrt{n}}. \] 

\item 代入数据，若统计值落在拒绝域，即 $|t|>t_{\alpha/2}(n-1)$, 则拒绝零假设。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.1.b. 均值的标准误 SEM = standard error of the mean}

\begin{itemize}
\item  {\color{red}问题：什么是均值的标准误？}

\item  答：均值的标准误 SEM 是指样本均值 $\overline{X}$ 的标准差。
设独立同分布的每个样本个体 $X_i$ 的标准差为 $\sigma$, 则样本容量为 $n$ 的样本均值  $\overline{X}$ 的方差为 
$$Var(\overline{X}) = \frac{1}{n^2} \sum_{i=1}^{n} Var(X_i) = \frac{1}{n^2} \sum_{i=1}^{n} \sigma^2 = \frac{\sigma^2}{n}.$$
所以样本均值 $\overline{X}$ 的理论标准差为 $$ SEM = \frac{\sigma}{\sqrt{n}}. $$

%\item The SEM describes the variation of the average of $n$ random values with mean $\mu$ and variance $s^2$. This value is 

\item  总体标准差 $\sigma$ 未知，用样本标准差 $s$ 代替，所以均值的标准误为
$$SEM = \frac{s}{\sqrt{n}}. $$
%\item  均值的方差变小，直观意义是均值落在总体均值的附近的可能性更大。


%\item It means that if you were to repeat the entire experiment several times and calculate an average for each experiment, then these averages would follow a distribution that is narrower than that of the original distribution. 
%\vspace{0.3cm}
%\item[2.] 解释 t 统计量的计算公式。
%\item 设要检验均值是否等于 $\mu_0$, 则使用的 t 统计量为 
%$$ T = \frac{\overline{X}-\mu_0}{SEM}. $$

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.1.c. p值 p-values }

\begin{itemize}
\item  {\color{red}问题：解释 p 值的概念。}
\item  解答：
\begin{itemize}
\item The p-value is the probability of obtaining a value {\color{blue}as {\bf numerically} large as or larger than} the observed $t$. 
\item The process rejects the hypothesis if the p-value is less than the significance level.
\end{itemize}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.1.d. 双侧检验和单侧检验的 p 值 }

\begin{itemize}

\item  {\color{red}问题：分别写出双侧检验和单侧检验的 p 值的计算公式。}

\item  解答：记 $t$ 是根据样本数据算出来的统计值，
$$t = \frac{\bar{x}-\mu_0}{s/\sqrt{n}} .$$

\begin{itemize}
\item 双侧检验 $H_0:\mu=\mu_0\,\, vs. \,\, H_1:\mu\neq \mu_0$, 计算p值：$p= \mathbb{P}(|T|>t)$. 
\item 单侧检验 $H_0:\mu\le\mu_0\,\, vs. \,\, H_1:\mu > \mu_0$, 计算p值：$p= \mathbb{P}(T>t)$. 
\item 单侧检验 $H_0:\mu\ge\mu_0\,\, vs. \,\, H_1:\mu < \mu_0$, 计算p值：$p= \mathbb{P}(T<t)$. 

\end{itemize}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.1.e. 双侧检验的 p 值 }

\begin{figure}
\centering
\includegraphics[height=0.6\textheight, width=0.7\textwidth]{plot-5-1-3.png}
%\caption{The area is the p value of a both-side hypothetical test}
\caption{阴影部分面积是双侧检验的p值}
\end{figure}

%data=intake$pre
%tvalue=(mean(data)-7725)/(sd(data)/sqrt(11))
%pvalue=2*pt(tvalue,10)
%
%x<-seq(-4,4,0.1)
%y=dt(x,10)
%plot(x,y,type='l')
%abline(h=0)
%x1=x[x<=tvalue]
%y1=y[x<=tvalue]
%x2=x[x>=-tvalue]
%y2=y[x>=-tvalue]
%polygon(c(x2,tail(x2,1),head(x2,1)), c(y2,0,0), col='blue')
%polygon(c(x1,tail(x1,1),head(x1,1)), c(y1,0,0), col='blue')

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.1.f.  Example: daily energy intake }

\begin{itemize}
\item  {\color{red}问题：Here is an example concerning daily energy intake in kJ for 11 women (Altman, 1991, p. 183). 
检验这些数据的均值是否等于 7725. }

\begin{enumerate}
\item 解答第一步，输入数据。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
daily.intake <- c(5260,5470,5640,6180,6390,6515,
+ 6805,7515,7515,8230,8770)
\end{lstlisting}

\item 解答第二步，查看汇总统计量。发现样本均值为 6753.6.% 似乎明显小于 7725. 
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
mean(daily.intake)
sd(daily.intake)
quantile(daily.intake)
\end{lstlisting}


\item 解答第三步，使用 t 检验，读出结果：拒绝均值为 7725 的原假设。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
t.test(daily.intake,mu=7725)
\end{lstlisting}

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.1.g.  Example }

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> t.test(daily.intake,mu=7725)

         One Sample t-test
data:  daily.intake
t = -2.8208, df = 10, p-value = 0.01814
alternative hypothesis: true mean is not equal to 7725
95 percent confidence interval:
 5986.348 7520.925
sample estimates:
mean of x
 6753.636
\end{lstlisting}


\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.1.h. Example  }

\begin{itemize}
\item  {\color{red}问题：按定义直接计算 t 统计值和 p 值。}

\item 解答：计算公式分别为
$$ t = \frac{\bar{x}-\mu_0}{s/\sqrt{n}}, \,\,\, p=\mathbb{P}(|T|>t). $$

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
x <- c(5260,5470,5640,6180,6390,6515,6805,7515,7515,
+  8230,8770)
tvalue <- (mean(x)-7725)/(sd(x)/sqrt(11))
pvalue <- 2*pt(tvalue,10)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.1.i. Example   }

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.7\textwidth]{daily-intake-t-curve.png}
 \caption{probability density function and the t statistic}
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.2.a. Wilcoxon 符号秩和检验 Wilcoxon signed-rank test}

\begin{itemize}
\item  {\color{red}问题：什么是 Wilcoxon 符号秩和检验？ }
\item 解答：是一种非参数检验，对总体不用假设服从正态分布。

\item  {\color{red}问题：解释 Wilcoxon 符号秩和检验的步骤。}
\item 解答：

\begin{enumerate}
\item 将每个数据都减去待检验的均值。
\item 将得到的差值按绝对值从小到大分配排名 $1,2,\cdots,n$. 
\item 计算差值符号为正的那些数据的排名的和，记为 $V$.
\item 若真实均值接近待检验的均值，则 $V$ 的值在某个范围内。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.2.b. Wilcoxon signed-rank test}

\begin{itemize}

\item  {\color{red}问题：t 检验的适用场景是什么？ }

\item 解答：

\begin{enumerate}

\item  The $t$ tests are fairly robust against departures from the normal distribution especially in larger samples, but sometimes you wish to avoid making that assumption. 

\item  To this end, the distribution-free methods are convenient. 

\item  These are generally obtained by replacing data with corresponding {\color{blue}order statistics}.

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.2.c. Wilcoxon signed-rank test}

\begin{itemize}


\item  {\color{red}问题：解释 Wilcoxon 符号秩和检验的思路。}

\item 解答：

\begin{enumerate}

\item  For the one-sample Wilcoxon test, the procedure is to subtract the theoretical $\mu_0$ and rank the differences according to their numerical value, ignoring the sign, and then calculate the sum of the positive or negative ranks. 

\item  The point is that, assuming only that the distribution is symmetric around $\mu_0$, the test statistic corresponds to selecting each number from $1$ to $n$ with probability $1/2$ and calculating the sum. 

\item  The distribution of the test statistic can be calculated exactly, at least in principle. It becomes computationally excessive in large samples, but the distribution is then very well approximated by a normal distribution.

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.2.d. Wilcoxon 符号秩和检验 }

\begin{itemize}


\item  {\color{red}问题：量化叙述 Wilcoxon 符号秩和检验。}

\item 解答：

\begin{enumerate}

\item  设一个连续总体关于某个参数 $\theta$ 对称，要检验的假设为
$$ H_0: \theta=0 \,\,\text{ v.s. }\,\, H_1: \theta\neq 0. $$

\item  设 $x_1,x_2,\cdots,x_n$ 是样本，计算 $|x_i|$ 在 $(|x_1|, |x_2|, \cdots, |x_n|)$ 中的秩，记为 $R_i$. 

\item  计算符号秩和统计量
$$W^+ = \sum\limits_{i=1}^{n} R_i I(x_i>0). $$

\item  拒绝域为 $$\{ W^+\le W^+_{\alpha/2}(n) \} \cup \{ W^+\ge W^+_{1-\alpha/2}(n) \}.$$

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.2.e. Wilcoxon 符号秩和检验 }

\begin{itemize}


\item  {\color{red}问题：解释 Wilcoxon 符号秩和检验的正态近似方法。}

\item 解答：在零假设 $H_0: \theta=0$ 成立时，有
$$E(W^+) = \frac{n(n+1)}{4}, \,\, Var(W^+) = \frac{n(n+1)(2n+1)}{24},$$
因此当 $n>50$ 时可用正态近似来计算拒绝域，
$$ \frac{W^+ - \frac{n(n+1)}{4}}{\sqrt{\frac{n(n+1)(2n+1)}{24}}} \,\,\dot\sim\,\, N(0,1). $$


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.2.f. Example: daily energy intake }

\begin{itemize}

\item {\color{red}问题：对 daily.intake 数据检验 $\mu_0=7725$, 使用符号秩和检验。}

\item 解答：结果显示，拒绝原假设，认为均值显著不等于 7725.
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> daily.intake <- c(5260,5470,5640,6180,6390,6515,6805,7515,7515,8230,8770)
> wilcox.test(daily.intake, mu=7725)

         Wilcoxon signed rank test with continuity correction
data:  daily.intake
V = 8, p-value = 0.0293
alternative hypothesis: true location is not equal to 7725
Warning message:
In wilcox.test.default(daily.intake, mu = 7725) :
  cannot compute exact p-value with ties
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.2.g. Example }

\begin{itemize}

\item  {\color{red}问题：按符号秩和检验的步骤，计算统计值和 p 值。}

\item  解答：这里使用正态近似计算 p 值。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> x <- c(5260,5470,5640,6180,6390,6515,6805,7515,7515,8230,8770)
> xs <- x-7725
> v <- sum(rank(abs(xs))*(xs>0))
> n=length(x)
> vn <- (v-n*(n+1)/4)/sqrt(n*(n+1)*(2*n+1)/24)
> pvalue <- pnorm(vn)*2
> v
[1] 8
> vn
[1] -2.222771
> pvalue
[1] 0.02623124
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.2.g. Example }

\begin{itemize}

\item  {\color{red}问题：计算统计量 $V$ 的分布列，精确计算 p 值。} 

\item  解答：在数字排名（秩） $1,2,3,\cdots,n=11$ 中，每个数字以1/2的概率选中，计算选中的数字的和（秩和 $V$），记录取到这个和的概率。共有 $2^{11}=2048$ 种情况。合并秩和相同的情况。

{\footnotesize
\begin{table}[ht!]
\begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|c|c|}\hline 
编号 &1&2&3&4&5&6&7&8&9&10&11& 秩和 $V$ \\ \hline 
1&x&x&x&x&x&x&x&x&x&x&x& 0 \\ \hline 
2&$\surd$&x&x&x&x&x&x&x&x&x&x& 1 \\ \hline 
3&x&$\surd$&x&x&x&x&x&x&x&x&x& 2 \\ \hline 
4&x&x&$\surd$&x&x&x&x&x&x&x&x& 3 \\ \hline 
5&$\surd$&$\surd$&x&x&x&x&x&x&x&x&x& 3 \\ \hline 
$\vdots$ &&&&&&&&&&&& $\vdots$ \\ \hline 
2048&$\surd$&$\surd$&$\surd$&$\surd$&$\surd$&$\surd$&$\surd$&$\surd$&$\surd$&$\surd$&$\surd$& 66 \\ \hline 
\end{tabular}
\end{table}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.2.h. Example  }

{\footnotesize
\begin{table}[ht!]
\begin{tabular}{|c|c|c|c|c|c|}\hline 
index & $x$ & $x-\mu_0$ & $\textrm{sign}(x-\mu_0)$ & $\textrm{abs}(x-\mu_0)$ & rank \\ \hline 
1 & 5260  &  $-2465$ &  $-$ & $2465$ & 11 \\ \hline 
2 & 5470  &  $-2255$ &  $-$ & $2255$ & 10   \\ \hline 
3 & 5640  &  $-2085$ &  $-$ & $2085$ & 9   \\ \hline 
4 & 6180  &  $-1545$ &  $-$ & $1545$ & 8   \\ \hline 
5 & 6390  &  $-1335$ &  $-$ & $1335$ & 7   \\ \hline 
6 & 6515  &  $-1210$ &  $-$ & $1210$ & 6   \\ \hline 
7 & 6805  &   $-920$  &  $-$ & $920$ & 4   \\ \hline 
8 & 7515  &  $-210$ &  $-$ & $210$ & 1.5   \\ \hline 
9 & 7515  &   $-210$ &  $-$ & $210$ & 1.5   \\ \hline 
10 & 8230  &  $505$ &  $+$ & $505$ & 3  \\ \hline 
11 & 8770 &   $1045$ &  $+$ & $1045$ & 5 \\ \hline 
\end{tabular}
\end{table}
}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.3.a. 双样本 t 检验的步骤 Two-sample t test}

\begin{itemize}
%\item[1.] 问题：写出双样本均值检验的步骤。
\item[1.] 假设总体是正态分布 $X\sim N(\mu_1,\sigma_1^2)$, $Y\sim N(\mu_2,\sigma_2^2)$, 方差未知。
\item[2.] 设有样本数据：$(x_1,x_2,\cdots,x_n)$ 与 $(y_1,y_2,\cdots,y_m)$.
\item[3.] 检验均值是否相等：$H_0: \mu_1=\mu_2,\,\,\textrm{v.s.} \,\, H_1:\mu_1\neq \mu_2$. 
\item[4.] 记 $\overline{X}=\frac{1}{n}\sum\limits_{i=1}^{n} X_i$, 
$\overline{Y}=\frac{1}{m}\sum\limits_{i=1}^{m} Y_j$ 为样本均值，定义 t 统计量
\[ T= \frac{\overline{X}-\overline{Y}}{SEDM}, \,\, \text{其中}\,\, SEDM= \sqrt{\frac{S_1^2}{n}+\frac{S_2^2}{m}}.\] 
\item[5.] 在原假设成立时，$T\sim t(n+m-2)$, 由此得出拒绝域。
\item[6.]  代入数据，计算 t 值与 p 值。得出检验结论。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.3.b. Example: daily energy expenditure data}

\begin{itemize}
\item  {\color{red}问题：载入 \,{\color{blue}\texttt{energy}}, 检验不同 \,{\color{blue}\texttt{stature}} 的 \,{\color{blue}\texttt{expend}} 是否有显著差异。}

\item 解答：使用 \,{\color{blue}\texttt{t.test()}} 函数，参数为模型公式\,{\color{blue}\texttt{expend ~ stature}}.
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> library(ISwR)
> attach(energy)
> t.test(expend ~ stature)

	Welch Two Sample t-test
data:  expend by stature
t = -3.8555, df = 15.919, p-value = 0.001411
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -3.459167 -1.004081
sample estimates:
 mean in group lean mean in group obese 
           8.066154           10.297778 
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.3.c. Example  }

\begin{itemize}
\item  {\color{red}问题：解释上一页的结果。}
%\item 解答：
    \begin{itemize}\itemsep0.1cm
    \item 这是两样本的均值差异的检验，$t$统计值为 $-3.85$, 自由度 $15.9$.
    \item 模型公式表示用因子变量 \,{\color{blue}\texttt{stature}} 来对 \,{\color{blue}\texttt{expend}} 进行分类。
    \item $p$值为 $0.0014$, 表明均值有显著差异。
    \item 置信区间是均值之差 $\mu_1-\mu_2$ 的置信区间。
    \item 置信度为 $95\%$ 的置信区间不包含零点，也表明均值有显著差异。
    \item 这是使用 Welch 方法的 $t$ 检验，不假设两样本的方差相等。
    \end{itemize}

\item  {\color{red}问题：假设方差相等，重新检验均值是否有显著差异。}
%\item 解答：
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)
> # attach(energy)
> t.test(expend~stature, var.equal=T)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.4. 比较方差 Comparison of variances }

\begin{itemize}
\item  {\color{red}问题：检验不同 \,{\color{blue}\texttt{stature}} 的 \,{\color{blue}\texttt{expend}} 的这两组数据的方差是否相等。}

\item 解答：检验结果不显著，无法拒绝方差相等的原假设。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)
> # attach(energy)
> var.test(expend~stature)

	F test to compare two variances
data:  expend by stature
F = 0.78445, num df = 12, denom df = 8, p-value = 0.6797
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.1867876 2.7547991
sample estimates:
ratio of variances 
          0.784446 
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.5.a. 两样本符号秩和检验 Two-sample Wilcoxon test}

\begin{itemize}
\item  {\color{red}问题：解释 Wilcoxon signed-rank test 的原理。}
\item  解答：

\begin{enumerate}

\item You might prefer a nonparametric test if you doubt the normal distribution assumptions of the t test. 

\item  如果不确定总体是否服从正态分布，可以使用非参数检验。

\item The two-sample Wilcoxon test is based on replacing the data by their rank (without regard to grouping) and calculating the sum of the ranks in one group, thus reducing the problem to one of sampling $n_1$ values without replacement from the numbers $1$ to $n_1 + n_2$.

\item  两样本符号秩和检验的思路：先不分组，计算数据的排名次序，然后将其中一组的排名加起来。
如果两组数据的均值没有显著差异，那么就相当于在排名 $1,2,\cdots,n_1+n_2$ 中随机选取 $n_1$ 个数字。

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.5.b. Example: daily energy expenditure data }

\begin{itemize}
\item {\color{red}问题：用符号秩方法，检验不同 \,{\color{blue}\texttt{stature}} 的 \,{\color{blue}\texttt{expend}} 是否有显著差异。}

\item 解答：看到 $p$ 值小于 $\alpha$, 说明两组数据的均值有显著差异。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)
> # attach(energy)
> wilcox.test(expend~stature)

	Wilcoxon rank sum test with continuity correction
data:  expend by stature
W = 12, p-value = 0.002122
alternative hypothesis: true location shift is not equal to 0

\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.5.c. Example }

\begin{itemize}

\item {\color{red}问题：解释上述结果中的 $W$ 统计值的计算方法。}

\item  解答：

\begin{enumerate}

\item  The test statistic $W$ is the sum of ranks in the first group minus its theoretical minimum, i.e., it is zero if all the smallest values fall in the first group.

\item  Some textbooks use a statistic that is the sum of ranks in the smallest group with no minimum correction, which is of course equivalent. 

\item  Notice that, as in the one-sample example, we are having problems with ties and rely on the approximate normal distribution of $W$.

\end{enumerate}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.5.d. Example }

\begin{itemize}

\item {\color{red}问题：直接计算加以验证。}

\item  解答：

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> # library(ISwR)
> # attach(energy)
> tapply(expend,stature,length)
 lean obese 
   13     9 
> W = sum(rank(expend)*(stature == 'lean')) - sum(1:13)
[1] 12
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.5.e. 茆诗松书例子7.6.7. }

\begin{itemize}

\item {\color{red}问题：对某种羊绒可利用先进的工艺处理其含脂率。为比较处理效果，收集6组处理前的羊绒和5组处理后的羊绒，测得其含脂率数据如下: 
\begin{center}
%\begin{table}[ht!]
\begin{tabular}{|c|cccccc|}\hline 
处理前 & 0.20 & 0.24 & 0.66 & 0.42 & 0.12 & 0.25  \\ \hline 
处理后 & 0.13 & 0.07 & 0.21 & 0.08 & 0.19 &   \\ \hline 
\end{tabular}
%\end{table}
\end{center}
试问处理后的含脂率是否明显下降了? ($\alpha = 0.05$)
}

\item  解答：将两组样本混合后，从大到小排序，求出相应的秩如下表。
计算秩和统计量的值为 $$ W= 1+2+4+5+7 =19. $$
本题中 $m=6$, $n=5$. 若取 $\alpha = 0.05$, 查表知 $W_{0.05}(6,5)= 20$, 从而拒绝域为 $\{W<20\}$. 
此处检验统计量值为 19, 所以拒绝原假设，即认为处理后的含脂率下降了。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.5.f. 例子7.6.7. }

{\scriptsize
\begin{center}
%\begin{table}[ht!]
\begin{tabular}{|M{2cm}|M{2cm}|M{2cm}|}\hline 
混合样本 & 秩 & 分组  \\ \hline 
0.07 & 1 & 处理后  \\ \hline 
0.08 & 2 & 处理后  \\ \hline 
0.12 & 3 &   \\ \hline 
0.13 & 4 & 处理后  \\ \hline 
0.19 & 5 & 处理后  \\ \hline 
0.20 & 6 &   \\ \hline 
0.21 & 7 & 处理后  \\ \hline 
0.24 & 8 &   \\ \hline 
0.25 & 9 &   \\ \hline 
0.42 & 10 &   \\ \hline 
0.66 & 11 &   \\ \hline 
\end{tabular}
%\end{table}
\end{center}
}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.6.a. 配对 t 检验 The paired t test}

\begin{itemize}
\item {\color{red}问题：解释配对检验的使用场景。}

\item  解答：

\begin{enumerate}
\item Paired tests are used when {\color{blue}there are two measurements on the same experimental unit}. 
\item The theory is essentially based on taking differences and thus reducing the problem to that of a one-sample test. 
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.6.b. 配对数据的差的分布是否一样 }

\begin{itemize}
\item {\color{red}问题：什么是 Bland - Altman 图？}

\item  解答：

\begin{enumerate}

\item It is implicitly assumed that {\color{blue}such differences have a distribution that is independent of the level}. 

\item A useful graphical check is to make a scatter plot of the pairs with the line of identity added or to plot {\color{blue}the difference against the average} of the pair (sometimes called a Bland - Altman plot).

\item If there seems to be a tendency for the dispersion to change with the level, then it may be useful to transform the data; frequently the standard deviation is proportional to the level, in which case a logarithmic transformation is useful.

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.6.c.  }

 \begin{figure}
 \centering
 \includegraphics[height=0.6\textheight, width=0.8\textwidth]{plot-5-6-2.png}
 \caption{A Bland-Altman plot}
 \end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.6.d. 配对数据检验的例子 }

\begin{itemize}
\item {\color{red}问题：载入 \,{\color{blue}\texttt{intake}} 数据，检验 \,{\color{blue}\texttt{pre}} 和 \,{\color{blue}\texttt{post}} 数据是否有显著差异。}

\item 解答：每行数据是同一个人，所以使用配对检验。结果是有显著差异。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(intake)
> intake
> t.test(pre,post,paired=T)

	Paired t-test
data:  pre and post
t = 11.941, df = 10, p-value = 3.059e-07
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 1074.072 1566.838
sample estimates:
mean of the differences 
               1320.455 
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.7.a. 配对的符号秩和检验 The matched-pairs Wilcoxon test}

\begin{itemize}
\item {\color{red}问题：不假设数据的正态分布特征，检验 \,{\color{blue}\texttt{intake}} 数据的 \,{\color{blue}\texttt{pre}} 和 \,{\color{blue}\texttt{post}} 数据是否有显著差异。}

\item 解答：使用配对数据的 Wilcoxon 检验，加参数 \,{\color{blue}\texttt{paired=T}} 来实现。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> wilcox.test(pre,post,paired=T)

    Wilcoxon signed rank test with continuity correction
data:  pre and post
V = 66, p-value = 0.00384
alternative hypothesis: true location shift is not equal to 0

\end{lstlisting}

\item 结果显示 $p$ 值小于 $\alpha$, 说明在前后的能量摄入有显著差异。


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.7.b.  }

\begin{itemize}

\item {\color{red}问题：按定义直接计算，验证统计量 $V=66$, 以及 $p=0.00384$. }

\item 解答：因为11个人的能量摄入都有所减少，所以 $$V=1+2+\cdots+11=66.$$ 

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.8.1. Exercise. }

\begin{itemize}

\item  %5.1 
Do the values of the \,{\color{blue}\texttt{react}} data set (notice that this is a single vector, not a data frame) look reasonably normally distributed? Does the mean differ significantly from zero according to a $t$ test?

%\vspace{0.3cm}

\item  Which statement is incorrect?
\begin{enumerate}[(a)]
\item The distribution appears reasonably normal. 
\item The t test has a statistic $t=-7.75$. 
\item The 95 percent confidence interval is $[-0.9985, -0.5943]$. 
\item The mean does not differ from zero significantly. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.1. Exercise - Answer. }

\begin{itemize}

\item  %5.1 
(d). The distribution appears reasonably normal, with some discretization effect and two weak outliers, one at each end. There is a significant difference from zero ($t=-7.75$, $p=1.1 \times 10^{-13}$).

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
qqnorm(react)
t.test(react)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.8.2. Exercise. }

\begin{itemize}

\item  %5.2 
In the data set \,{\color{blue}\texttt{vitcap}}, use a $t$ test to compare the vital capacity for the two groups. Calculate a 99\% confidence interval for the difference. The result of this comparison may be misleading. Why?

%\vspace{0.3cm}

\item  Which statement is incorrect?
\begin{enumerate}[(a)]
\item The 99 percent confidence interval is $[-2.0644, -0.0222]$. 
\item The t test has a statistic $t=-2.9228$. 
\item The p value of the test is $p=0.008724$. 
\item The age does not differ by group. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.2. Exercise - Answer. }

\begin{itemize}

\item  %5.2 
(d) The fact that \,{\color{blue}\texttt{age}} also differs by group may cause bias.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
t.test(vital.capacity~group,conf=0.99,data=vitcap)
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.8.3. Exercise. }

\begin{itemize}

\item  %5.3 
Perform the analyses of the \,{\color{blue}\texttt{react}} and \,{\color{blue}\texttt{vitcap}} data using nonparametric techniques. 

\item  Which statement is incorrect? ($\alpha=0.01$)
\begin{enumerate}[(a)]
\item The Wilcoxon signed rank test shows the mean of react is not equal to zero. 
\item The Wilcoxon signed rank test shows the difference of the means of vital.capacity in the two groups is not equal to zero.
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.3. Exercise - Answer. }

\begin{itemize}

\item  %5.3 
(b) This is quite parallel to \,{\color{blue}\texttt{t.test}} usage.  

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> wilcox.test(react)

	Wilcoxon signed rank test with continuity correction

data:  react
V = 9283.5, p-value = 2.075e-13
alternative hypothesis: true location is not equal to 0

> wilcox.test(vital.capacity~group, data=vitcap)

	Wilcoxon rank sum test with continuity correction

data:  vital.capacity by group
W = 30.5, p-value = 0.01783
alternative hypothesis: true location shift is not equal to 0
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.8.4. Exercise. }

\begin{itemize}

\item  %5.4 
Perform graphical checks of the assumptions for a paired $t$ test in the \,{\color{blue}\texttt{intake}} data set.

%\item  Which statement is incorrect?
%\begin{enumerate}[(a)]
%\item 
%\item 
%\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.4. Exercise - Answer. }

\begin{itemize}

\item  %5.4 
The following builds 
a post-vs.-pre plot, 
a difference-vs.-average (Bland-Altman) plot, 
and a histogram 
and a QQ plot of the differences.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
attach(intake)
opar <- par(mfrow=c(2,2))
plot(post ~ pre)
abline(0,1)
plot((post+pre)/2, post - pre, ylim=range(0,post-pre))
abline(h=0)
hist(post-pre)
qqnorm(post-pre)
detach(intake)
par(opar)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.8.5. Exercise. }

\begin{itemize}

\item  %5.5 
The function \,{\color{blue}\texttt{shapiro.test}} computes a test of normality based on the degree of linearity of the QQ plot. Apply it to the \,{\color{blue}\texttt{react}} data. Does it help to remove the outliers?

\item  Which statement is incorrect?
\begin{enumerate}[(a)]
\item The react data is not normally distributed. 
\item The react data with outliers removed is normally distributed. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.5. Exercise - Answer. }

\begin{itemize}

\item  %5.5 
(b) The outliers are the first and last observations in the (sorted) data vector and can be removed as follows. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> shapiro.test(react)
	Shapiro-Wilk normality test
data:  react
W = 0.95701, p-value = 2.512e-08
> shapiro.test(react[-c(1,334)])
	Shapiro-Wilk normality test
data:  react[-c(1, 334)]
W = 0.96869, p-value = 1.377e-06
> qqnorm(react[-c(1,334)])
> qqnorm(react)
\end{lstlisting}

\item  The test comes out highly significant even with outliers removed because it picks up the discretization effect in the otherwise nearly straight-line \,{\color{blue}\texttt{qqnorm}} plot.

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.8.6. Exercise. }

\begin{itemize}

\item  %5.6 
The crossover trial in \,{\color{blue}\texttt{ashina}} can be analyzed for a drug effect in a simple way if you ignore a potential period effect. However, you can do better. Hint: Consider the intra-individual differences; if there were only a period effect present, how should the differences behave in the two groups? Compare the results of the simple method and the improved method.

\item  Which statement is incorrect?  ($\alpha=0.01$)
\begin{enumerate}[(a)]
\item  A cross-over trial uses the trial participant as their own control. 
\item  In a cross-over trial, each participant gets more than one treatment and we compare the outcome on the two treatments on the same participant. %They are also known as change-over trials.
\item  The paired t test shows the true difference in means is not equal to zero. 
\item  The t test with a period effect shows no evidence for the treatment effect. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.6. Exercise - Answer. }

\begin{itemize}

\item  %5.6 
(d) A paired $t$ test is appropriate if there is no period effect. However, even with a period effect (assumed additive), you would expect the difference between the two periods to be the same in both groups if there were no effect of treatment. This can be used to test for a treatment effect. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(ashina)
> t.test(vas.active,vas.plac, paired=T)

	Paired t-test

data:  vas.active and vas.plac
t = -3.2269, df = 15, p-value = 0.005644
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -71.1946 -14.5554
sample estimates:
mean of the differences 
                -42.875 
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.6. Exercise - Answer. }

\begin{itemize}

\item  %5.6 
Notice that the subtraction is reversed in one group. Observe that the confidence interval in the second case is for twice the treatment effect.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> t.test((vas.active-vas.plac)[grp==1], (vas.plac-vas.active)[grp==2])

	Welch Two Sample t-test

data:  (vas.active - vas.plac)[grp == 1] and (vas.plac - vas.active)[grp == 2]
t = -3.2517, df = 13.97, p-value = 0.005807
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -130.56481  -26.76853
sample estimates:
mean of x mean of y 
-53.50000  25.16667 
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.8.7. Exercise. }

\begin{itemize}

\item  %5.7 
Perform 10 one-sample $t$ tests on simulated normally distributed data sets of 25 observations each. Repeat the experiment, but instead simulate samples from a different distribution; try the t distribution with 2 degrees of freedom and the exponential distribution (in the latter case, test for the mean being equal to 1). Can you find a way to automate this so that you can have a larger number of replications?

%\item  Which statement is incorrect? 
%\begin{enumerate}[(a)]
%\item 
%\item 
%\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.8.7. Exercise - Answer. }

\begin{itemize}

\item  %5.7 
This is the sort of thing \,{\color{blue}\texttt{replicate}} is for. The plot at the end shows a P-P plot with logarithmic axes, showing that extreme p-values tend to be exaggerated.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
t.test(rnorm(25))$p.value       #repeat 10x
t.test(rt(25,df=2))$p.value     #repeat 10x
t.test(rexp(25), mu=1)$p.value  #repeat 10x
x <- replicate(5000, t.test(rexp(25), mu=1)$p.value)
qqplot(sort(x),ppoints(5000),type="l",log="xy")
\end{lstlisting}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.1. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型1
设数据 $X_1,\cdots,X_n$ 是来自正态分布 $N(\mu,\sigma^2)$ 的简单随机样本。
下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item 统计量 $U=\frac{\bar{X}-\mu}{\sigma/\sqrt{n}}$ 服从标准正态分布 $N(0,1)$.
\item 统计量 $T=\frac{\bar{X}-\mu}{S/\sqrt{n}}$ 服从分布 $t(n-1)$.
\item 当总体方差已知的时候，使用 $t$ 统计量，当总体方差未知的时候，使用 $U$ 统计量。
\item 当 $n$ 很大时，自由度为 $n$ 的 $t$ 分布越来越接近标准正态分布。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.1. 单项选择题 }

\begin{itemize}

\item  
解答：(c).
反过来了。当总体方差未知的时候，使用 $t$ 统计量，当总体方差已知的时候，使用 $U$ 统计量。


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.2. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型2

设数据 $X_1,\cdots,X_n$ 来自某未知总体。用符号秩和检验均值是否等于 $m$, 下述步骤中，不正确的是哪个？
\begin{enumerate}[(a)]
\item 将每个数据都减去待检验的均值。
\item 将得到的差值按从小到大分配排名 $1,2,\cdots,n$. 
\item 计算差值符号为正的那些数据的排名的和，记为 $V$.
\item 若真实均值接近待检验的均值，则 $V$ 的值在某个范围内。
\end{enumerate}

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.2. 单项选择题 }

\begin{itemize}

\item  
解答：(b).
将得到的差值按绝对值从小到大分配排名 $1,2,\cdots,n$. 



\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.3. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型3
载入 R 软件自带的 \,{\color{blue}\texttt{Nile}} 数据框。假设年流量服从正态分布。
运行下述命令，阅读结果。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> Nile
> t.test(Nile, mu=900)
\end{lstlisting}

下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item  结果表明无法拒绝年流量的均值为900的零假设。
\item  这个检验统计量的自由度是99.
\item  置信度为 95\% 的置信区间是 $[885.8, 952.9]$.
\item  这是单侧检验。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.3. 单项选择题 }

\begin{itemize}

\item  

解答：(d).
这是双侧检验。




\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.4. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型4
设数据 $X_1,\cdots,X_m$ 来自正态总体 $X\sim N(\mu_1,\sigma_1^2)$, 数据 $Y_1,\cdots,Y_n$ 来自另一个与总体 $X$ 独立的正态总体 $Y\sim N(\mu_2,\sigma_2^2)$. 考虑假设检验 $$H_0: \mu_1=\mu_2, \,\text{ vs. }\, H_1: \mu_1\neq \mu_2. $$

下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item 如果方差 $\sigma_1^2$ 和 $\sigma_2^2$ 已知，那么统计量 $U=\frac{(\bar{X}-\bar{Y})-(\mu_1-\mu_2)}{\text{SEDM}}$ 服从标准正态分布 $N(0,1)$. 
\item 上一个选项中的 $\text{SEDM}=\sqrt{\frac{\sigma_1^2}{m}+\frac{\sigma_2^2}{n}}$, 是随机变量 $X-Y$ 的标准差。
\item 如果方差相等即 $\sigma_1^2=\sigma_2^2$, 但未知，那么统计量 $T=\frac{(\bar{X}-\bar{Y})-(\mu_1-\mu_2)}{\text{SEDM}}$ 服从分布 $t(m+n-2)$. 
\item 上一个选项中的 $\text{SEDM}=S_\omega\sqrt{\frac{1}{m}+\frac{1}{n}}$, 其中 $S_\omega^2=\frac{(m-1)S_x^2+(n-1)S_y^2}{m+n-2}$.
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.4. 单项选择题 }

\begin{itemize}

\item  
解答：(b).
第一个选项中的 $\text{SEDM}$ 是随机变量 $\bar{X}-\bar{Y}$ 的标准差。


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.5. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型5
载入 \,{\color{blue}\texttt{ISwR}} 包里的 \,{\color{blue}\texttt{energy}} 数据框。数据 \,{\color{blue}\texttt{expend}} 按因子 \,{\color{blue}\texttt{stature}} 的不同水平分成了两组。假设这两组 \,{\color{blue}\texttt{expend}} 数据来自两个相互独立的正态分布,并且不假设它们的方差相等。运行下述命令，观察结果。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
library(ISwR)
energy
head(energy)
summary(energy)
t.test(energy$expend~energy$stature)
attach(energy)
t.test(expend~stature)
?t.test
\end{lstlisting}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.5. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item 在置信水平为 5\%时，这两组数据的均值显著不相等。
\item 这个统计量的自由度是 $m+n-2=20$. 
\item 这是双样本的均值差的假设检验。
\item 这里默认方差不等且未知，自由度是根据 Welch 方法来计算的。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.5. 单项选择题 }

\begin{itemize}

\item  
解答：(b).
默认按照 Welch 方法估计自由度，程序结果读出是 15.919. 


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.6. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型6
设数据 $X_1,\cdots,X_m$ 来自正态总体 $X\sim N(\mu_1,\sigma_1^2)$, 数据 $Y_1,\cdots,Y_n$ 来自另一个与总体 $X$ 独立的正态总体 $Y\sim N(\mu_2,\sigma_2^2)$. 
考虑假设检验 $$H_0: \sigma_1^2=\sigma_2^2, \,\text{ vs. }\, H_1: \sigma_1^2\neq \sigma_2^2. $$
检验 \,{\color{blue}\texttt{energy}} 数据中，偏胖人群和偏瘦人群的能量消耗数据的方差是否相等。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
var.test(expend~stature)
x <- energy$expend[stature=='lean']
y <- energy$expend[stature=='obese']
m <- length(x)
n <- length(y)
s1sq <- sum((x-mean(x))^2)/(m-1)
s2sq <- sum((y-mean(y))^2)/(n-1)
myf <- s1sq/s2sq
mypvalue <- 2*pf(myf,m-1,n-1)
\end{lstlisting}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.6. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  使用统计量 $F={S_1^2}/{S_2^2}$, 在 $H_0$ 为真时，$F$ 服从 $F(m-1,n-1)$ 分布。
\item  上述程序计算的 $F$ 统计值为 $0.6797$, $p$ 值为 $0.7844$.
\item  检验结果显示无法拒绝方差相等的零假设。
\item  方差比的置信区间包含1，这说明无法拒绝方差相等的零假设。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.6. 单项选择题 }

\begin{itemize}

\item  
解答：(b).
程序计算的 $F$ 统计值为 $0.7844$, $p$ 值为 $0.6797$.
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
curve(df(x,12,8),from=0, to=5)
abline(h=0)
abline(v=0)
points(myf,0,pch=16,col='red',add=T)
\end{lstlisting}

 \begin{figure}
 \centering
 \includegraphics[height=0.5\textheight, width=0.9\textwidth]{exercise-5-9-6.png}
 %\caption{}
 \end{figure}

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.7. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型7
变量 \,{\color{blue}\texttt{react}} 是一个数值型向量，包含了两名护士对结核菌素反应大小判断的差。检验其均值是否显著不等于零。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
react
?react
qqnorm(react)
t.test(react,mu=0)
wilcox.test(react,mu=0)
\end{lstlisting}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.7. 单项选择题 }

\begin{itemize}

\item  %7
下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item  $t$ 检验的结果表明，要拒绝均值等于零的零假设。
\item  从QQ图看出，数据近似可以看做是服从正态分布的。
\item  非参数检验的结果表明，要拒绝均值等于零的零假设。
\item  $t$ 检验的置信度为95\%的置信区间包含零，因此要拒绝均值等于零的零假设。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.7. 单项选择题 }

\begin{itemize}

\item  
解答：(d).
$t$ 检验的置信度为95\%的置信区间不包含零。


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.8. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型8
数据框 \,{\color{blue}\texttt{vitcap}} 包含24行3列数据，某个镉工厂的工人的年龄和肺活量。分组1表示暴露在工作环境至少10年，分组3表示不曾暴露在工作环境。比较这两组工人的肺活量是否有显著差异。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
head(vitcap)
t.test(vital.capacity~group, data=vitcap)
t.test(vital.capacity~group, data=vitcap, conf=0.99)
wilcox.test(vital.capacity~group,data=vitcap)
\end{lstlisting}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.8. 单项选择题 }

\begin{itemize}

\item  %8
下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item  $t$ 检验的统计值为 $-2.9228$, $p$值为 $0.0087$. 
\item  $t$ 检验结果表明，在置信水平 $\alpha=0.05$ 时，拒绝两组工人的肺活量无显著差异的零假设。
\item  置信度为 99\%的置信区间是 $[-2.06, -0.02]$.
\item  因为Wilcoxon符号秩和检验的 $p$ 值是 $0.1783$, 所以拒绝两组数据的均值差为零的零假设。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.8. 单项选择题 }

\begin{itemize}

\item  
解答：(d).
Wilcoxon符号秩和检验的 $p$ 值是 $0.01783$.


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.9. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型9
数据框 \,{\color{blue}\texttt{ashina}} 有16行3列，包含了一个使用一氧化氮合酶来抑制头痛的交叉试验的数据。数据是基于基线数据的记录疼痛程度的打分。变量 \,{\color{blue}\texttt{vas.active}} 是给予药物后记录的数据，变量 \,{\color{blue}\texttt{vas.plac}} 是给予安慰剂后记录的数据。第一组是先给予安慰剂，第二组是先给予药物。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
ashina
?ashina
colnames(ashina) <- c('x','y','grp')
attach(ashina)
t.test(x,y)
t.test(x,y,paired=T)
t.test(x[grp==1],y[grp==1])
t.test(x[grp==2],y[grp==2])
\end{lstlisting}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.9. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item  普通的双样本检验，因为 $p$ 值为 $0.02099$, 所以拒绝药物与安慰剂的作用无差异的零假设。
\item  配对的双样本检验，因为 $p$ 值为 $0.005644$, 所以拒绝药物与安慰剂的作用无差异的零假设。
\item 先给予安慰剂的那组，因为 $p$ 值为 $0.04415$, 所以拒绝药物与安慰剂的作用无差异的零假设。
\item  先给予药物的那组，因为 $p$ 值为 $0.02113$, 所以拒绝药物与安慰剂的作用无差异的零假设。
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.9.9. 单项选择题 }

\begin{itemize}

\item  
解答：(d).
先给予药物的那组，$p$ 值为 $0.2113$, 无法拒绝药物与安慰剂作用无差异的零假设。


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.10. 单项选择题 }

\begin{itemize}

\item %知识点：数据类型10
生成一些标准正态分布的随机数，并检验其均值是否显著不等于零。运行下述程序。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
x <- rnorm(25)  #1
tx <- t.test(x)  #2
summary(tx)  #3
tx$p.value  #4
class(tx)  #5
\end{lstlisting}

下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item  第一行命令产生了25个服从标准正态分布的随机数。
\item  第二行命令用 $t$ 检验，其零假设是数据 \,{\color{blue}\texttt{x}} 的均值等于零。
\item  第四行的结果表明，这个检验的 $p$ 值总是大于置信水平的。
\item  第五行命令表明，保存这个检验的结果的变量 \,{\color{blue}\texttt{tx}}, 其数据类型是 \,{\color{blue}\texttt{htest}}.
\end{enumerate}


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.9.10. 单项选择题 }

\begin{itemize}

\item  
解答：(c).
不一定的。用 \,{\color{blue}\texttt{rnorm()}} 函数生成25个随机数，其均值可能显著不等于零。虽然是例外情况，但也可能发生。这就是第一类错误（弃真）所描述的事情。实验例如：
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
txp <- replicate(100,t.test(rnorm(25))$p.value)
min(txp)
sum(txp<0.05)  #均值显著不等于零的比例约为5%
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.10.1. 简答题 }

\begin{enumerate}

\item 
%设数据 $X_1,\cdots,X_n$ 来自正态分布 $N(\mu,\sigma^2)$. 
设有来自正态总体 $N(\mu,\sigma^2)$ 的简单随机样本 $(X_1,\cdots,X_n)$. 
\begin{enumerate}
\item  均值 $\bar{X}$ 的标准误是多少？
\item  证明统计量 $U=\frac{\bar{X}-\mu}{\sigma/\sqrt{n}}$ 服从标准正态分布 $N(0,1)$.
\item  证明统计量 $T=\frac{\bar{X}-\mu}{S/\sqrt{n}}$ 服从分布 $t(n-1)$.
\item  比较标准正态分布与 $t$ 分布的异同。
\end{enumerate}


\item 
设数据 $X_1,\cdots,X_n$ 来自某未知总体。
\begin{enumerate}
\item 符号秩和检验可以用来检验什么？
\item Wilcoxon 符号秩和检验的统计量是怎么构造的？
\item 找出该统计量的 $p$ 值的计算方法。
\end{enumerate}



\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.10.3. 简答题 }

\begin{enumerate}\setcounter{enumi}{2}

\item 
设数据 $X_1,\cdots,X_m$ 来自正态分布 $N(\mu_1,\sigma_1^2)$, 数据 $Y_1,\cdots,Y_n$ 来自另一个独立的正态分布 $N(\mu_2,\sigma_2^2)$. 
\begin{enumerate}
\item 证明统计量 $U=\frac{(\bar{X}-\bar{Y})-(\mu_1-\mu_2)}{SEDM}$ 服从标准正态分布 $N(0,1)$, 
这里 $SEDM=\sqrt{\frac{\sigma_1^2}{m}+\frac{\sigma_2^2}{n}}$.
\item 设 $\sigma_1^2=\sigma_2^2$ 但未知。 证明统计量 $T=\frac{(\bar{X}-\bar{Y})-(\mu_1-\mu_2)}{SEDM}$ 服从分布 $t(m+n-2)$, 
这里 $SEDM=S_\omega\sqrt{\frac{1}{m}+\frac{1}{n}}$, 其中 $S_\omega^2=\frac{(m-1)S_x^2+(n-1)S_y^2}{m+n-2}$.
\end{enumerate}



\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{5.10.4. 简答题 }

\begin{enumerate}\setcounter{enumi}{3}

\item 
载入 ISwR 包里的 \verb+energy+ 数据框。数据 \verb+expend+ 按因子 \verb+stature+ 的不同水平分成了两组。假设这两组 \verb+expend+ 数据来自两个相互独立的正态分布。
\begin{enumerate}
\item 这两组数据的方差是否显著地不相等？
\item 这两组数据的均值在 $t$ 检验下是否显著地不相等？
\item 这两组数据的均值在 Wilcoxon 符号秩和检验下是否显著地不相等？
\end{enumerate}


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.10.5. 简答题 }

\begin{enumerate}\setcounter{enumi}{4}

\item  研究 ISwR 程序包里的 intake 数据框。假设其第一列数据来自正态分布 $N(\mu,\sigma^2)$. 
\begin{enumerate}
\item  均值差 $\bar{X}-\bar{Y}$ 的标准误是多少？
\item 总体均值 $\mu$ 在 t 检验下是否显著地不等于7725 ？解释检验的结果。
\item 总体均值 $\mu$ 在 Wilcoxon 符号秩和检验下是否显著地不等于7725 ？解释检验的结果。
\end{enumerate}

\item  研究 ISwR 程序包里的 intake 数据框。
\begin{enumerate}
\item  什么是配对的样本数据？画出 intake 数据的 Bland-Altman 图。
\item  这个数据是否适合配对 t 检验？
\item  每组配对的数据在 t 检验下是否显著不相等？
\item  每组配对的数据在 Wilcoxon 符号秩和检验下是否显著不相等？
\end{enumerate}


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.10.7. 简答题 }

\begin{enumerate}\setcounter{enumi}{6}

\item  研究 ISwR 程序包里的 react 数据。
\begin{enumerate}
\item  这些数据是否服从正态分布？
\item  这些数据的均值在 t 检验下是否显著不等于零？
\end{enumerate}

\item  研究 ISwR 程序包里的 vitcap 数据。
\begin{enumerate}
\item  两组肺活量的均值在 t 检验下是否显著不相等？
\item  计算两组肺活量的均值差的 99\% 的置信区间。
\end{enumerate}

\item  使用非参数方法，研究 ISwR 程序包里的 react 数据和 vitcap 数据。

\item  函数 shapiro.test() 的原理是什么？使用这个函数对 react 数据进行检验。使用这个函数能测出异常值吗？

\item  研究 ISwR 程序包里的 ashina 数据。分析药物的效果和时间先后的效果。



\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{5.10.12. 简答题 }

\begin{enumerate}\setcounter{enumi}{11}

\item  关于两类错误的实验。
\begin{enumerate}
\item  生成25个正态分布的随机数，并进行 t 检验。记录检验的 $p$ 值。
\item  生成25个自由度为2的 t 分布的随机数，并进行 t 检验。记录检验的 $p$ 值。
\item  生成25个均值为1的指数分布的随机数，并进行 t 检验。记录检验的 $p$ 值。
\item  编程自动实现将上述试验重复10遍。
\end{enumerate}
%Perform 10 one-sample t tests on simulated normally distributed data sets of 25 observations each. Repeat the experiment, but instead simulate samples from a different distribution; try the t distribution with 2 degrees of freedom and the exponential distribution (in the latter case, test for the mean being equal to 1). Can you find a way to automate this so that you can have a larger number of replications?


\item %2021年考试题目2：
设某班的测试成绩如下。设显著性水平为 0.05, 使用 t 检验，推断平均成绩是否等于70分。
\begin{table}[ht!]
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|}\hline
学号 &1&2&3&4&5&6&7&8&9&10\\ \hline
成绩 & 70 &72 & 57 & 59 & 85 & 63 & 56 & 62 & 80 & 89 \\ \hline
\end{tabular}
\end{table}


\end{enumerate}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}


